In [30]:
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
In [31]:
df = sns.load_dataset("titanic")
In [32]:
df.head()
Out[32]:
survived | pclass | sex | age | sibsp | parch | fare | embarked | class | who | adult_male | deck | embark_town | alive | alone | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 3 | male | 22.0 | 1 | 0 | 7.2500 | S | Third | man | True | NaN | Southampton | no | False |
1 | 1 | 1 | female | 38.0 | 1 | 0 | 71.2833 | C | First | woman | False | C | Cherbourg | yes | False |
2 | 1 | 3 | female | 26.0 | 0 | 0 | 7.9250 | S | Third | woman | False | NaN | Southampton | yes | True |
3 | 1 | 1 | female | 35.0 | 1 | 0 | 53.1000 | S | First | woman | False | C | Southampton | yes | False |
4 | 0 | 3 | male | 35.0 | 0 | 0 | 8.0500 | S | Third | man | True | NaN | Southampton | no | True |
In [33]:
# sns.scatterplot(x="age", y = "fare", data = df)
sns.scatterplot(x="age" , y="fare" , hue="sex" , data=df)
Out[33]:
<Axes: xlabel='age', ylabel='fare'>
In [17]:
df2 = sns.load_dataset("tips")
df2.head()
Out[17]:
total_bill | tip | sex | smoker | day | time | size | |
---|---|---|---|---|---|---|---|
0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 |
1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 |
2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 |
3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 |
4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 |
In [35]:
sns.scatterplot(x= "total_bill", y= "tip", data = df2)
Out[35]:
<Axes: xlabel='total_bill', ylabel='tip'>
In [19]:
sns.scatterplot(x= "total_bill", y= "tip", hue = "smoker",style = "sex", data = df2)
Out[19]:
<Axes: xlabel='total_bill', ylabel='tip'>
In [46]:
sns.jointplot(x= "total_bill", y= "tip",hue = "smoker" ,data = df2)
Out[46]:
<seaborn.axisgrid.JointGrid at 0x140d8ff9f50>
In [40]:
sns.jointplot(x= "total_bill", y= "tip",data = df2, kind ="reg")
Out[40]:
<seaborn.axisgrid.JointGrid at 0x140d8228990>
In [22]:
df2.corr(numeric_only=True)
Out[22]:
total_bill | tip | size | |
---|---|---|---|
total_bill | 1.000000 | 0.675734 | 0.598315 |
tip | 0.675734 | 1.000000 | 0.489299 |
size | 0.598315 | 0.489299 | 1.000000 |
In [44]:
sns.get_dataset_names()
Out[44]:
['anagrams', 'anscombe', 'attention', 'brain_networks', 'car_crashes', 'diamonds', 'dots', 'dowjones', 'exercise', 'flights', 'fmri', 'geyser', 'glue', 'healthexp', 'iris', 'mpg', 'penguins', 'planets', 'seaice', 'taxis', 'tips', 'titanic']
In [52]:
df = sns.load_dataset("dowjones")
df.head()
Out[52]:
Date | Price | |
---|---|---|
0 | 1914-12-01 | 55.00 |
1 | 1915-01-01 | 56.55 |
2 | 1915-02-01 | 56.00 |
3 | 1915-03-01 | 58.30 |
4 | 1915-04-01 | 66.45 |
In [25]:
sns.lineplot(x = "Date", y= "Price", data = df)
Out[25]:
<Axes: xlabel='Date', ylabel='Price'>
In [58]:
sns.lineplot(x = "Date", y= "Price", data = df.head(10))
plt.xticks(rotation = 90)
plt.show()
In [59]:
df["year"] = df["Date"].dt.year
df["month"] = df["Date"].dt.month
In [63]:
df
Out[63]:
Date | Price | year | month | |
---|---|---|---|---|
0 | 1914-12-01 | 55.00 | 1914 | 12 |
1 | 1915-01-01 | 56.55 | 1915 | 1 |
2 | 1915-02-01 | 56.00 | 1915 | 2 |
3 | 1915-03-01 | 58.30 | 1915 | 3 |
4 | 1915-04-01 | 66.45 | 1915 | 4 |
... | ... | ... | ... | ... |
644 | 1968-08-01 | 883.72 | 1968 | 8 |
645 | 1968-09-01 | 922.80 | 1968 | 9 |
646 | 1968-10-01 | 955.47 | 1968 | 10 |
647 | 1968-11-01 | 964.12 | 1968 | 11 |
648 | 1968-12-01 | 965.39 | 1968 | 12 |
649 rows × 4 columns
In [65]:
sns.lineplot(x = "month", y= "Price", data = df[df["year"]==1960])
Out[65]:
<Axes: xlabel='month', ylabel='Price'>
In [ ]: